suppressPackageStartupMessages(library(dplyr))
library(readr)
suppressPackageStartupMessages(library(lubridate))
library(ggplot2)
wea <- read_csv('/Users/Andy/Google Drive/boco-jail/downtown-boulder-weather.csv',col_types = cols())
glimpse(wea)
Observations: 6,400
Variables: 12
$ DATE <int> 20000101, 20000102, 20000103, 20000104, 20000105, 20000106, 200001...
$ PRCP <dbl> 0.00, 0.00, 0.08, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, ...
$ SNOW <dbl> 0.0, 0.0, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...
$ SNWD <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ TMAX <dbl> 54, 40, 36, 49, 47, 42, 47, 50, 45, 42, 61, 58, 41, 64, 63, 49, 64...
$ TMIN <dbl> 29, 22, 19, 13, 26, 16, 19, 23, 29, 30, 17, 41, 25, 25, 34, 22, 30...
$ WT01 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ WT03 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ WT04 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ WT05 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ WT06 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ WT11 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
summary(wea)
DATE PRCP SNOW SNWD
Min. :20000101 Min. :0.00000 Min. : 0.0000 Length:6400
1st Qu.:20040519 1st Qu.:0.00000 1st Qu.: 0.0000 Class :character
Median :20081004 Median :0.00000 Median : 0.0000 Mode :character
Mean :20083356 Mean :0.05655 Mean : 0.2372
3rd Qu.:20130222 3rd Qu.:0.01000 3rd Qu.: 0.0000
Max. :20170714 Max. :9.08000 Max. :16.7000
NA's :5 NA's :1190
TMAX TMIN WT01 WT03 WT04
Min. : 6.00 Min. :-17.00 Min. :1 Min. :1 Length:6400
1st Qu.: 53.00 1st Qu.: 27.00 1st Qu.:1 1st Qu.:1 Class :character
Median : 67.00 Median : 38.00 Median :1 Median :1 Mode :character
Mean : 65.99 Mean : 38.17 Mean :1 Mean :1
3rd Qu.: 81.00 3rd Qu.: 51.00 3rd Qu.:1 3rd Qu.:1
Max. :102.00 Max. : 77.00 Max. :1 Max. :1
NA's :1 NA's :3 NA's :6304 NA's :5623
WT05 WT06 WT11
Min. :1 Min. :1 Min. :1
1st Qu.:1 1st Qu.:1 1st Qu.:1
Median :1 Median :1 Median :1
Mean :1 Mean :1 Mean :1
3rd Qu.:1 3rd Qu.:1 3rd Qu.:1
Max. :1 Max. :1 Max. :1
NA's :6337 NA's :6370 NA's :6331
names(wea) <- tolower(names(wea))
wea$date <- lubridate::ymd(wea$date)
wea <- wea %>% select( date,prcp,snow,tmax,tmin)
head(wea)
wea %>%
ggplot(aes(date,tmax))+
geom_point() +
ylab('Max Temp') +
ggtitle('Downtown Boulder Weather')
wea %>%
ggplot(aes(date,prcp))+
geom_point() +
ylim(0,3)
wea %>%
ggplot(aes(date,snow))+
geom_point() +
ylim(0,3)
Some info on the bookings-with-transient-status.csv data: - fta is “Failure to Appear” - ftc is “Failure to Comply” - (Sam) These are bookings in Boulder County Jail only. - (Sam) Each row is an individual booking. - (Sam) boulder means the arrest was made by Boulder PD - (Sam) Column transient for whether someone was homeless or not. - (Sam) I created indicator variables for a variety of antihomeless charges (that a local law school came up with). antihomeless is true if any of the antihomeless charges existed in the booking. So one source of error could be that if someone was arrested for an antihomeless charge AND a more serious offense, I still mark it as “antihomeless”. - (Sam) I don’t remember making any_antihomeless – I suspect it’s identical to antihomeless and introduced by accident - (Andy) There are two Booking Time columns? I will use booking_time for now, as it seems better. - (Andy) What are locations? Is that where they were booked? - (Andy) Booked and booking_time appear to be duplicates?
bk <- read_csv('/Users/Andy/Google Drive/boco-jail/bookings-with-transient-status.csv',col_types = cols())
| | 0%
| | 0%
| | 1%
|= | 1%
|= | 1%
|= | 1%
|= | 1%
|= | 2%
|= | 2%
|= | 2%
|== | 2%
|== | 2%
|== | 3% 1 MB
|== | 3% 1 MB
|== | 3% 1 MB
|== | 3% 1 MB
|== | 3% 1 MB
|== | 4% 1 MB
|== | 4% 1 MB
|=== | 4% 1 MB
|=== | 4% 1 MB
|=== | 4% 1 MB
|=== | 5% 1 MB
|=== | 5% 1 MB
|=== | 5% 1 MB
|=== | 5% 1 MB
|=== | 5% 1 MB
|==== | 5% 2 MB
|==== | 6% 2 MB
|==== | 6% 2 MB
|==== | 6% 2 MB
|==== | 6% 2 MB
|==== | 6% 2 MB
|==== | 7% 2 MB
|===== | 7% 2 MB
|===== | 7% 2 MB
|===== | 7% 2 MB
|===== | 7% 2 MB
|===== | 8% 2 MB
|===== | 8% 2 MB
|===== | 8% 2 MB
|====== | 8% 2 MB
|====== | 8% 2 MB
|====== | 9% 3 MB
|====== | 9% 3 MB
|====== | 9% 3 MB
|====== | 9% 3 MB
|====== | 9% 3 MB
|====== | 10% 3 MB
|======= | 10% 3 MB
|======= | 10% 3 MB
|======= | 10% 3 MB
|======= | 10% 3 MB
|======= | 11% 3 MB
|======= | 11% 3 MB
|======= | 11% 3 MB
|======== | 11% 3 MB
|======== | 11% 3 MB
|======== | 12% 4 MB
|======== | 12% 4 MB
|======== | 12% 4 MB
|======== | 12% 4 MB
|======== | 12% 4 MB
|======== | 12% 4 MB
|========= | 13% 4 MB
|========= | 13% 4 MB
|========= | 13% 4 MB
|========= | 13% 4 MB
|========= | 13% 4 MB
|========= | 14% 4 MB
|========= | 14% 4 MB
|========== | 14% 4 MB
|========== | 14% 4 MB
|========== | 14% 4 MB
|========== | 15% 5 MB
|========== | 15% 5 MB
|========== | 15% 5 MB
|========== | 15% 5 MB
|========== | 15% 5 MB
|=========== | 16% 5 MB
|=========== | 16% 5 MB
|=========== | 16% 5 MB
|=========== | 16% 5 MB
|=========== | 16% 5 MB
|=========== | 17% 5 MB
|=========== | 17% 5 MB
|============ | 17% 5 MB
|============ | 17% 5 MB
|============ | 17% 5 MB
|============ | 18% 6 MB
|============ | 18% 6 MB
|============ | 18% 6 MB
|============ | 18% 6 MB
|============= | 18% 6 MB
|============= | 19% 6 MB
|============= | 19% 6 MB
|============= | 19% 6 MB
|============= | 19% 6 MB
|============= | 19% 6 MB
|============= | 20% 6 MB
|============= | 20% 6 MB
|============== | 20% 6 MB
|============== | 20% 6 MB
|============== | 20% 6 MB
|============== | 20% 7 MB
|============== | 21% 7 MB
|============== | 21% 7 MB
|============== | 21% 7 MB
|=============== | 21% 7 MB
|=============== | 21% 7 MB
|=============== | 22% 7 MB
|=============== | 22% 7 MB
|=============== | 22% 7 MB
|=============== | 22% 7 MB
|=============== | 22% 7 MB
|=============== | 23% 7 MB
|================ | 23% 7 MB
|================ | 23% 7 MB
|================ | 23% 7 MB
|================ | 23% 7 MB
|================ | 24% 8 MB
|================ | 24% 8 MB
|================ | 24% 8 MB
|================= | 24% 8 MB
|================= | 24% 8 MB
|================= | 25% 8 MB
|================= | 25% 8 MB
|================= | 25% 8 MB
|================= | 25% 8 MB
|================= | 25% 8 MB
|================= | 26% 8 MB
|================== | 26% 8 MB
|================== | 26% 8 MB
|================== | 26% 8 MB
|================== | 26% 8 MB
|================== | 26% 9 MB
|================== | 27% 9 MB
|================== | 27% 9 MB
|=================== | 27% 9 MB
|=================== | 27% 9 MB
|=================== | 27% 9 MB
|=================== | 28% 9 MB
|=================== | 28% 9 MB
|=================== | 28% 9 MB
|=================== | 28% 9 MB
|=================== | 28% 9 MB
|==================== | 29% 9 MB
|==================== | 29% 9 MB
|==================== | 29% 9 MB
|==================== | 29% 9 MB
|==================== | 29% 10 MB
|==================== | 30% 10 MB
|==================== | 30% 10 MB
|===================== | 30% 10 MB
|===================== | 30% 10 MB
|===================== | 30% 10 MB
|===================== | 31% 10 MB
|===================== | 31% 10 MB
|===================== | 31% 10 MB
|===================== | 31% 10 MB
|===================== | 31% 10 MB
|====================== | 32% 10 MB
|====================== | 32% 10 MB
|====================== | 32% 10 MB
|====================== | 32% 10 MB
|====================== | 32% 10 MB
|====================== | 33% 11 MB
|====================== | 33% 11 MB
|======================= | 33% 11 MB
|======================= | 33% 11 MB
|======================= | 33% 11 MB
|======================= | 34% 11 MB
|======================= | 34% 11 MB
|======================= | 34% 11 MB
|======================= | 34% 11 MB
|======================== | 34% 11 MB
|======================== | 34% 11 MB
|======================== | 35% 11 MB
|======================== | 35% 11 MB
|======================== | 35% 11 MB
|======================== | 35% 11 MB
|======================== | 35% 12 MB
|======================== | 36% 12 MB
|========================= | 36% 12 MB
|========================= | 36% 12 MB
|========================= | 36% 12 MB
|========================= | 36% 12 MB
|========================= | 37% 12 MB
|========================= | 37% 12 MB
|========================= | 37% 12 MB
|========================== | 37% 12 MB
|========================== | 37% 12 MB
|========================== | 38% 12 MB
|========================== | 38% 12 MB
|========================== | 38% 12 MB
|========================== | 38% 12 MB
|========================== | 38% 13 MB
|========================== | 39% 13 MB
|=========================== | 39% 13 MB
|=========================== | 39% 13 MB
|=========================== | 39% 13 MB
|=========================== | 39% 13 MB
|=========================== | 40% 13 MB
|=========================== | 40% 13 MB
|=========================== | 40% 13 MB
|============================ | 40% 13 MB
|============================ | 40% 13 MB
|============================ | 41% 13 MB
|============================ | 41% 13 MB
|============================ | 41% 13 MB
|============================ | 41% 13 MB
|============================ | 41% 13 MB
|============================ | 42% 14 MB
|============================= | 42% 14 MB
|============================= | 42% 14 MB
|============================= | 42% 14 MB
|============================= | 42% 14 MB
|============================= | 42% 14 MB
|============================= | 43% 14 MB
|============================= | 43% 14 MB
|============================== | 43% 14 MB
|============================== | 43% 14 MB
|============================== | 43% 14 MB
|============================== | 44% 14 MB
|============================== | 44% 14 MB
|============================== | 44% 14 MB
|============================== | 44% 14 MB
|=============================== | 44% 15 MB
|=============================== | 45% 15 MB
|=============================== | 45% 15 MB
|=============================== | 45% 15 MB
|=============================== | 45% 15 MB
|=============================== | 45% 15 MB
|=============================== | 46% 15 MB
|=============================== | 46% 15 MB
|================================ | 46% 15 MB
|================================ | 46% 15 MB
|================================ | 46% 15 MB
|================================ | 47% 15 MB
|================================ | 47% 15 MB
|================================ | 47% 15 MB
|================================ | 47% 15 MB
|================================= | 47% 16 MB
|================================= | 48% 16 MB
|================================= | 48% 16 MB
|================================= | 48% 16 MB
|================================= | 48% 16 MB
|================================= | 48% 16 MB
|================================= | 49% 16 MB
|================================= | 49% 16 MB
|================================== | 49% 16 MB
|================================== | 49% 16 MB
|================================== | 49% 16 MB
|================================== | 50% 16 MB
|================================== | 50% 16 MB
|================================== | 50% 16 MB
|================================== | 50% 16 MB
|=================================== | 50% 16 MB
|=================================== | 50% 17 MB
|=================================== | 51% 17 MB
|=================================== | 51% 17 MB
|=================================== | 51% 17 MB
|=================================== | 51% 17 MB
|=================================== | 51% 17 MB
|=================================== | 52% 17 MB
|==================================== | 52% 17 MB
|==================================== | 52% 17 MB
|==================================== | 52% 17 MB
|==================================== | 52% 17 MB
|==================================== | 53% 17 MB
|==================================== | 53% 17 MB
|==================================== | 53% 17 MB
|===================================== | 53% 17 MB
|===================================== | 53% 18 MB
|===================================== | 54% 18 MB
|===================================== | 54% 18 MB
|===================================== | 54% 18 MB
|===================================== | 54% 18 MB
|===================================== | 54% 18 MB
|====================================== | 55% 18 MB
|====================================== | 55% 18 MB
|====================================== | 55% 18 MB
|====================================== | 55% 18 MB
|====================================== | 55% 18 MB
|====================================== | 56% 18 MB
|====================================== | 56% 18 MB
|======================================= | 56% 18 MB
|======================================= | 56% 18 MB
|======================================= | 56% 19 MB
|======================================= | 57% 19 MB
|======================================= | 57% 19 MB
|======================================= | 57% 19 MB
|======================================= | 57% 19 MB
|======================================= | 57% 19 MB
|======================================== | 58% 19 MB
|======================================== | 58% 19 MB
|======================================== | 58% 19 MB
|======================================== | 58% 19 MB
|======================================== | 58% 19 MB
|======================================== | 59% 19 MB
|======================================== | 59% 19 MB
|========================================= | 59% 19 MB
|========================================= | 59% 20 MB
|========================================= | 59% 20 MB
|========================================= | 60% 20 MB
|========================================= | 60% 20 MB
|========================================= | 60% 20 MB
|========================================= | 60% 20 MB
|========================================== | 60% 20 MB
|========================================== | 61% 20 MB
|========================================== | 61% 20 MB
|========================================== | 61% 20 MB
|========================================== | 61% 20 MB
|========================================== | 61% 20 MB
|========================================== | 62% 20 MB
|=========================================== | 62% 20 MB
|=========================================== | 62% 20 MB
|=========================================== | 62% 21 MB
|=========================================== | 62% 21 MB
|=========================================== | 63% 21 MB
|=========================================== | 63% 21 MB
|=========================================== | 63% 21 MB
|============================================ | 63% 21 MB
|============================================ | 63% 21 MB
|============================================ | 64% 21 MB
|============================================ | 64% 21 MB
|============================================ | 64% 21 MB
|============================================ | 64% 21 MB
|============================================ | 65% 21 MB
|============================================ | 65% 21 MB
|============================================= | 65% 21 MB
|============================================= | 65% 21 MB
|============================================= | 65% 22 MB
|============================================= | 66% 22 MB
|============================================= | 66% 22 MB
|============================================= | 66% 22 MB
|============================================= | 66% 22 MB
|============================================== | 66% 22 MB
|============================================== | 67% 22 MB
|============================================== | 67% 22 MB
|============================================== | 67% 22 MB
|============================================== | 67% 22 MB
|============================================== | 67% 22 MB
|============================================== | 68% 22 MB
|=============================================== | 68% 22 MB
|=============================================== | 68% 22 MB
|=============================================== | 68% 22 MB
|=============================================== | 68% 23 MB
|=============================================== | 69% 23 MB
|=============================================== | 69% 23 MB
|=============================================== | 69% 23 MB
|================================================ | 69% 23 MB
|================================================ | 69% 23 MB
|================================================ | 70% 23 MB
|================================================ | 70% 23 MB
|================================================ | 70% 23 MB
|================================================ | 70% 23 MB
|================================================ | 70% 23 MB
|================================================= | 71% 23 MB
|================================================= | 71% 23 MB
|================================================= | 71% 23 MB
|================================================= | 71% 23 MB
|================================================= | 71% 24 MB
|================================================= | 72% 24 MB
|================================================= | 72% 24 MB
|================================================== | 72% 24 MB
|================================================== | 72% 24 MB
|================================================== | 72% 24 MB
|================================================== | 73% 24 MB
|================================================== | 73% 24 MB
|================================================== | 73% 24 MB
|================================================== | 73% 24 MB
|================================================== | 73% 24 MB
|=================================================== | 74% 24 MB
|=================================================== | 74% 24 MB
|=================================================== | 74% 24 MB
|=================================================== | 74% 25 MB
|=================================================== | 74% 25 MB
|=================================================== | 75% 25 MB
|=================================================== | 75% 25 MB
|==================================================== | 75% 25 MB
|==================================================== | 75% 25 MB
|==================================================== | 75% 25 MB
|==================================================== | 76% 25 MB
|==================================================== | 76% 25 MB
|==================================================== | 76% 25 MB
|==================================================== | 76% 25 MB
|===================================================== | 76% 25 MB
|===================================================== | 77% 25 MB
|===================================================== | 77% 25 MB
|===================================================== | 77% 25 MB
|===================================================== | 77% 26 MB
|===================================================== | 77% 26 MB
|===================================================== | 78% 26 MB
|====================================================== | 78% 26 MB
|====================================================== | 78% 26 MB
|====================================================== | 78% 26 MB
|====================================================== | 79% 26 MB
|====================================================== | 79% 26 MB
|====================================================== | 79% 26 MB
|====================================================== | 79% 26 MB
|======================================================= | 79% 26 MB
|======================================================= | 80% 26 MB
|======================================================= | 80% 26 MB
|======================================================= | 80% 26 MB
|======================================================= | 80% 26 MB
|======================================================= | 80% 27 MB
|======================================================= | 81% 27 MB
|======================================================== | 81% 27 MB
|======================================================== | 81% 27 MB
|======================================================== | 81% 27 MB
|======================================================== | 81% 27 MB
|======================================================== | 82% 27 MB
|======================================================== | 82% 27 MB
|======================================================== | 82% 27 MB
|========================================================= | 82% 27 MB
|========================================================= | 82% 27 MB
|========================================================= | 83% 27 MB
|========================================================= | 83% 27 MB
|========================================================= | 83% 27 MB
|========================================================= | 83% 28 MB
|========================================================= | 83% 28 MB
|========================================================== | 84% 28 MB
|========================================================== | 84% 28 MB
|========================================================== | 84% 28 MB
|========================================================== | 84% 28 MB
|========================================================== | 84% 28 MB
|========================================================== | 85% 28 MB
|========================================================== | 85% 28 MB
|=========================================================== | 85% 28 MB
|=========================================================== | 85% 28 MB
|=========================================================== | 86% 28 MB
|=========================================================== | 86% 28 MB
|=========================================================== | 86% 28 MB
|=========================================================== | 86% 28 MB
|=========================================================== | 86% 29 MB
|============================================================ | 87% 29 MB
|============================================================ | 87% 29 MB
|============================================================ | 87% 29 MB
|============================================================ | 87% 29 MB
|============================================================ | 87% 29 MB
|============================================================ | 88% 29 MB
|============================================================ | 88% 29 MB
|============================================================= | 88% 29 MB
|============================================================= | 88% 29 MB
|============================================================= | 88% 29 MB
|============================================================= | 89% 29 MB
|============================================================= | 89% 29 MB
|============================================================= | 89% 29 MB
|============================================================= | 89% 30 MB
|============================================================== | 89% 30 MB
|============================================================== | 90% 30 MB
|============================================================== | 90% 30 MB
|============================================================== | 90% 30 MB
|============================================================== | 90% 30 MB
|============================================================== | 90% 30 MB
|============================================================== | 91% 30 MB
|=============================================================== | 91% 30 MB
|=============================================================== | 91% 30 MB
|=============================================================== | 91% 30 MB
|=============================================================== | 91% 30 MB
|=============================================================== | 92% 30 MB
|=============================================================== | 92% 30 MB
|=============================================================== | 92% 30 MB
|================================================================ | 92% 31 MB
|================================================================ | 93% 31 MB
|================================================================ | 93% 31 MB
|================================================================ | 93% 31 MB
|================================================================ | 93% 31 MB
|================================================================ | 93% 31 MB
|================================================================ | 94% 31 MB
|================================================================= | 94% 31 MB
|================================================================= | 94% 31 MB
|================================================================= | 94% 31 MB
|================================================================= | 94% 31 MB
|================================================================= | 95% 31 MB
|================================================================= | 95% 31 MB
|================================================================= | 95% 31 MB
|================================================================== | 95% 32 MB
|================================================================== | 95% 32 MB
|================================================================== | 96% 32 MB
|================================================================== | 96% 32 MB
|================================================================== | 96% 32 MB
|================================================================== | 96% 32 MB
|================================================================== | 96% 32 MB
|=================================================================== | 97% 32 MB
|=================================================================== | 97% 32 MB
|=================================================================== | 97% 32 MB
|=================================================================== | 97% 32 MB
|=================================================================== | 97% 32 MB
|=================================================================== | 98% 32 MB
|=================================================================== | 98% 32 MB
|====================================================================| 98% 32 MB
|====================================================================| 98% 33 MB
|====================================================================| 98% 33 MB
|====================================================================| 99% 33 MB
|====================================================================| 99% 33 MB
|====================================================================| 99% 33 MB
|====================================================================| 99% 33 MB
|=====================================================================| 100% 33 MB
number of columns of result is not a multiple of vector length (arg 1)46578 parsing failures.
row # A tibble: 5 x 5 col row col expected actual expected <int> <chr> <chr> <chr> actual 1 1537 Case No no trailing characters .0 file 2 1539 Case No no trailing characters .0 row 3 1540 Case No no trailing characters .0 col 4 1541 Case No no trailing characters .0 expected 5 1542 Case No no trailing characters .0 actual # ... with 1 more variables: file <chr>
... ................. ... ............................................. ........ ............................................. ...... ............................................. .... ............................................. ... ............................................. ... ............................................. ........ ............................................. ...... .......................................
See problems(...) for more details.
glimpse(bk)
Observations: 167,633
Variables: 30
$ Name <chr> "HOOD,AARON JAY", "LAWYER,KENNETH A", "AGUILAR TOR...
$ Booked <dttm> 2000-01-01 02:12:00, 2000-01-01 04:01:00, 2000-01...
$ Location <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ DOB <date> 1975-10-08, 1958-09-29, 1972-11-14, 1953-06-07, 1...
$ Race <chr> "W", "W", "W", "W", "W", "W", "W", "W", "W", "W", ...
$ Sex <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", ...
$ Case No <int> 991126052, 991001313, 991126053, 991031932, 991102...
$ Arresting Agency <chr> "UNIVERSITY OF COLORADO", "JAIL MITTS ONLY", "UNIV...
$ Arrest Date <date> 1999-12-31, 2000-01-16, 1999-12-31, 1999-12-31, 1...
$ camping <chr> "False", "False", "False", "False", "False", "Fals...
$ fta <chr> "True", "False", "False", "False", "False", "False...
$ ftc <chr> "False", "False", "False", "False", "False", "Fals...
$ booking_time <dttm> 2000-01-01 02:12:00, 2000-01-01 04:01:00, 2000-01...
$ boulder <chr> "False", "False", "False", "False", "False", "Fals...
$ urination <chr> "False", "False", "False", "False", "False", "Fals...
$ vehicle_as_residence <chr> "False", "False", "False", "False", "False", "Fals...
$ public_obstruct <chr> "False", "False", "False", "False", "False", "Fals...
$ public_trespass <chr> "False", "False", "False", "False", "False", "Fals...
$ begging <chr> "False", "False", "False", "False", "False", "Fals...
$ antihomeless <chr> "False", "False", "False", "False", "False", "Fals...
$ smoking <chr> "False", "False", "False", "False", "False", "Fals...
$ any_antihomeless <chr> "False", "False", "False", "False", "False", "Fals...
$ Address <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ City <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ State <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ ZIP Code <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ Booking Date <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ Booking Time <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ Facility <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ transient <chr> "False", "False", "False", "False", "False", "Fals...
# 'Booking Time' appears to be a duplicate/bad column
bk <- bk %>% select(-`Booking Time`)
# make columns names lowercase and remove spaces
names(bk) <- tolower(names(bk))
names(bk) <- gsub(' ','_',names(bk))
# some vars make more sense as factors
bk$arresting_agency <- as.factor(bk$arresting_agency)
bk$location <- as.factor(bk$location)
# add wkday,month,year so we can aggregate by those
bk$wkday <- lubridate::wday(bk$booking_date,label=TRUE)
bk$month_ <- lubridate::month(bk$arrest_date,label=TRUE)
bk$year <- lubridate::year(bk$arrest_date)
# modern reporting seems to start in 2000 (yearly totals go from less 100 before 2000, to order of 10,000 starting w/ 2000). Keep only 2000 on here.
bk <- bk %>% filter(year>=2000)
bk$booked_date <- lubridate::date(bk$booked)
# a bunch of vars should be logical
to_log <- function(a_col){
as.logical(a_col)
}
cols_to_log <- c('camping','boulder','urination','vehicle_as_residence','public_obstruct','public_trespass','begging','antihomeless','smoking','any_antihomeless','transient','fta','ftc')
bk[cols_to_log] <- lapply(bk[cols_to_log],to_log)
glimpse(bk)
Observations: 163,939
Variables: 33
$ name <chr> "LAWYER,KENNETH A", "COPELAND,MARK WILLIAM", "DITZ...
$ booked <dttm> 2000-01-01 04:01:00, 2000-01-01 03:39:00, 2000-01...
$ location <fctr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ dob <date> 1958-09-29, 1953-01-27, 1948-01-25, 1977-03-15, 1...
$ race <chr> "W", "W", "W", "W", "W", "W", "W", "W", "W", "W", ...
$ sex <chr> "M", "M", "M", "F", "M", "M", "M", "F", "M", "M", ...
$ case_no <int> 991001313, 1089421, 1031978, 1103574, 5, 1101849, ...
$ arresting_agency <fctr> JAIL MITTS ONLY, LAFAYETTE PD, BOULDER PD, LONGMO...
$ arrest_date <date> 2000-01-16, 2000-01-01, 2000-01-01, 2000-01-01, 2...
$ camping <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ fta <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FA...
$ ftc <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ booking_time <dttm> 2000-01-01 04:01:00, 2000-01-01 03:39:00, 2000-01...
$ boulder <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ urination <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ vehicle_as_residence <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ public_obstruct <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ public_trespass <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ begging <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ antihomeless <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ smoking <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ any_antihomeless <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ address <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ city <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ state <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ zip_code <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ booking_date <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ facility <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ transient <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ wkday <ord> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ month_ <ord> Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, ...
$ year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20...
$ booked_date <date> 2000-01-01, 2000-01-01, 2000-01-01, 2000-01-01, 2...
mean(bk$transient,na.rm = TRUE)*100
[1] 13.05669
mean(bk$boulder)*100
[1] 6.589646
levels(bk$arresting_agency)
[1] "BOULDER COUNTY DRUG TASK FORCE" "BOULDER COUNTY SHERIFFS OFFICE"
[3] "BOULDER PD" "COLORADO STATE PATROL"
[5] "COMMUNITY CORRECTIONS" "DISTRICT ATTORNEYS OFFICE"
[7] "ERIE PD" "JAIL MITTS ONLY"
[9] "LAFAYETTE PD" "LONGMONT PD"
[11] "LOUISVILLE PD" "NEDERLAND MARSHALS OFFICE"
[13] "OTHER" "PAROLE"
[15] "STATE DIVISION OF WILDLIFE" "UNIVERSITY OF COLORADO"
[17] "WARD MARSHALS OFFICE"
bk %>%
group_by(arresting_agency) %>%
tally() %>%
arrange( desc(n) ) %>%
ggplot(aes(x=reorder(arresting_agency,n),y=n))+
geom_bar(stat='identity',aes(fill=arresting_agency)) +
coord_flip()
bk %>%
filter(year %in% c(2011,2012,2013,2015,2015)) %>%
group_by(year,month_) %>%
tally() %>%
ggplot(aes(month_,n))+
geom_point() +
geom_bar(stat='identity',aes(fill=month_)) +
facet_wrap(~year)
bk %>%
filter(!is.na(wkday)) %>%
group_by(wkday) %>%
tally() %>%
ggplot( aes(wkday,n)) +
geom_col(aes(fill=wkday)) +
ggtitle('Total Arrests By Day, for ALL data')
bk %>%
filter(!is.na(wkday)) %>%
filter(year>1999) %>%
group_by(year,wkday) %>%
tally() %>%
ggplot( aes(wkday,n)) +
geom_col(aes(fill=wkday)) +
facet_wrap(~year) +
ggtitle('Total Arrests By Day, for each year')
bk %>%
filter(!is.na(wkday)) %>%
filter(year>1999) %>%
group_by(month_,wkday) %>%
tally() %>%
ggplot( aes(wkday,n)) +
geom_col(aes(fill=wkday)) +
facet_wrap(~month_) +
ggtitle('Total Arrests By Day, for each month, includes all years')
bk %>%
filter(arrest_date>"2000-01-01") %>%
group_by(arrest_date) %>%
tally() %>%
ggplot(aes(arrest_date,n)) +
geom_point(alpha=0.2) +
ylim(0,60) +
geom_smooth(method="lm")
NA
bk_wea <- left_join(bk,wea,by=c('arrest_date'='date'))
glimpse(bk_wea)
Observations: 163,939
Variables: 37
$ name <chr> "LAWYER,KENNETH A", "COPELAND,MARK WILLIAM", "DITZ...
$ booked <dttm> 2000-01-01 04:01:00, 2000-01-01 03:39:00, 2000-01...
$ location <fctr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ dob <date> 1958-09-29, 1953-01-27, 1948-01-25, 1977-03-15, 1...
$ race <chr> "W", "W", "W", "W", "W", "W", "W", "W", "W", "W", ...
$ sex <chr> "M", "M", "M", "F", "M", "M", "M", "F", "M", "M", ...
$ case_no <int> 991001313, 1089421, 1031978, 1103574, 5, 1101849, ...
$ arresting_agency <fctr> JAIL MITTS ONLY, LAFAYETTE PD, BOULDER PD, LONGMO...
$ arrest_date <date> 2000-01-16, 2000-01-01, 2000-01-01, 2000-01-01, 2...
$ camping <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ fta <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FA...
$ ftc <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ booking_time <dttm> 2000-01-01 04:01:00, 2000-01-01 03:39:00, 2000-01...
$ boulder <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ urination <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ vehicle_as_residence <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ public_obstruct <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ public_trespass <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ begging <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ antihomeless <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ smoking <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ any_antihomeless <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ address <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ city <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ state <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ zip_code <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ booking_date <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ facility <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ transient <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F...
$ wkday <ord> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ month_ <ord> Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, ...
$ year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20...
$ booked_date <date> 2000-01-01, 2000-01-01, 2000-01-01, 2000-01-01, 2...
$ prcp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ snow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ tmax <dbl> 49, 54, 54, 54, 54, 54, 54, 51, 54, 54, 54, 49, 54...
$ tmin <dbl> 22, 29, 29, 29, 29, 29, 29, 35, 29, 29, 29, 22, 29...
bk %>%
group_by(arrest_date) %>%
tally() %>%
left_join(wea,by=c('arrest_date'='date')) %>%
ggplot(aes(tmin,n)) +
geom_point() +
geom_smooth(method = 'lm')
bk%>%
filter(transient==TRUE) %>%
group_by(arrest_date) %>%
tally() %>%
left_join(wea,by=c('arrest_date'='date')) %>%
ggplot(aes(tmin,n)) +
geom_point() +
geom_smooth(method = 'lm')
bk%>%
filter(antihomeless==TRUE) %>%
group_by(arrest_date) %>%
tally() %>%
left_join(wea,by=c('arrest_date'='date')) %>%
ggplot(aes(tmin,n)) +
geom_point() +
geom_smooth(method = 'lm')
bk_wea %>%
filter(year==2002) %>%
group_by(arrest_date) %>%
tally() %>%
left_join(wea,by=c('arrest_date'='date')) %>%
ggplot(aes(tmin,n)) +
geom_point() +
geom_smooth(method = 'lm')
bk %>%
group_by(arrest_date) %>%
tally() %>%
left_join(wea,by=c('arrest_date'='date')) %>%
ggplot(aes(arrest_date,tmin)) +
geom_point() +
geom_point(aes(x=arrest_date,y=n),color='red')